# Authors:	        	Carlos F. Gould; Xiaoxue Hou
# Institution:	    	Columbia University; Johns Hopkins University
# File Date: 	      	August 1, 2020
# Publication title:	Jointly modeling the adoption and use of LPG in rural India
# File purpose:	    	Make figures in the main text

# 1. Libraries ------

library(readstata13)
library("dplyr")
library("pscl")
library("mhurdle")
library("ordinal")
library(foreign)
library(ggplot2)
library(scales)
library(gridExtra)
library(grid)
library(ggpubr)
library(cowplot)
library(dabestr)
library(ggbeeswarm)
library(ggthemes)


# 2. Read in the data ------

# 2.1 Main ACCESS data ----
setwd("~/Downloads/") # set path to files
access <- haven::read_dta("access2018_2018Dec31.dta")


# 2.2 Main model double hurdle model outputs ------
# Produced from Stata code TwoModelAnalysis.do

setwd("~/Downloads/") # set path to files

consume_caste<-read.csv("consume_caste.csv")
select_caste<-read.csv("select_caste.csv")
consume_edu<-read.csv("consume_edu.csv")
select_edu<-read.csv("select_edu.csv")
consume_gender<-read.csv("consume_gender.csv")
select_gender<-read.csv("select_gender.csv")
consume_house<-read.csv("consume_house.csv")
select_house<-read.csv("select_house.csv")
consume_exp<-read.csv("consume_exp.csv")
select_exp<-read.csv("select_exp.csv")
consume_age<-read.csv("consume_age.csv")
select_age<-read.csv("select_age.csv")

consume_yrs <- read.csv("consume_lpgyear.csv")
consume_pmuy <- read.csv("consume_pmuy.csv")

###

access_deduplication <- access %>%
  distinct(finalhhid, .keep_all = T)

# 3. Create necessary variables ------

###Adoption of LPG
access["LPG"] <- ifelse(access$m4_q103_lpg == 1, 1,0)

access$fuel_stack<-NA #fuel stacking
access$fuel_stack[access$LPG==0]<-4 #no adoption
access$fuel_stack[(access$LPG==1) &
                    (access$m5_q118_main_cookfuel==3) &
                    (access$m4_q109_firewood==0& 
                       ((access$m4_q113_dungcake==0) & 
                          ((access$m4_q114_agro==0) & 
                             (access$m4_q115_other_fuel==0))))]<-1 #exclusive use

access$fuel_stack[is.na(access$fuel_stack)] <- ifelse(access$m5_q118_main_cookfuel[is.na(access$fuel_stack)]==3, 2, 3)
# label family who use LPG as primary fuel but also stack fuels 2
# label family who do not use LPG as primary fuel but as one of the stack fuels 3

access$LPG_exclusive_use<-ifelse(access$fuel_stack==1,1,0)
access$Fuel_stacking_with_LPG_as_Primary<-ifelse(access$fuel_stack==2,1,0)
access$Fuel_stacking_with_Other_Fuels_as_Primary<-ifelse(access$fuel_stack==3,1,0)
access$No_Adoption_of_LPG<-ifelse(access$fuel_stack==4,1,0)

# test for # of observations of ele_stove/kero-stove
unique(access$m4_q115_other_fuel_s)
access["ele_stove"]<-ifelse(access$m4_q115_other_fuel_s=="INDUCTION STOVE",1,0)
sum(access$ele_stove)

# access["kero_stove"]<-ifelse(access$m4_q115_other_fuel_s=="KEROSENE STOVE"|
#                                access$m4_q115_other_fuel_s=="KEROSENE"|
#                                access$m4_q115_other_fuel_s=="KEROSENE OIL"|
#                                access$m4_q115_other_fuel_s=="KEROSENE OIL FOR STOVE"|
#                                access$m4_q115_other_fuel_s=="HEATER AND KEROSENE STOVE",1,0)
# sum(access$kero_stove)

###Calculate monthly LPG consumption
access$m4_q103_4_lpg_lcyl[is.na(access$m4_q103_4_lpg_lcyl)]<-0
access$m4_q103_7_lpg_scyl[is.na(access$m4_q103_7_lpg_scyl)]<-0
access$lpg_consumption<-(access$m4_q103_4_lpg_lcyl*14200+access$m4_q103_7_lpg_scyl*5000)/12
access$lpg_log<-log(access$lpg_consumption+1)
#LARGE LPG CYLINDER IS 14.2 KG; SMALL LPG CYLINDER IS 5 KG

#take out NAs in expenditure
#access<-subset(access, !is.na(access$m1_q32_month_expenditure))


###Log transform of monthly expenditure
access$month_expenditure_log<-log(access$m1_q32_month_expenditure + 1)

###Create caste indicators
access["Caste_SC"] <- ifelse(access$m1_q25_caste == 1, 1, 0)
access["Caste_ST"] <- ifelse(access$m1_q25_caste == 2, 1, 0)
access["Caste_OBC"]<-ifelse(access$m1_q25_caste == 3, 1, 0)
access["Caste_General"] <- ifelse(access$m1_q25_caste == 4|access$m1_q25_caste == 5, 1, 0)

access$Caste[access$m1_q25_caste == 1]<- 4
access$Caste[access$m1_q25_caste == 2]<- 3
access$Caste[access$m1_q25_caste == 3]<- 2
access$Caste[access$m1_q25_caste == 4|access$m1_q25_caste == 5]<- 1

access$Caste<-as.factor(access$Caste)

###Create education indicators
access["Edu_NoFormalSchooling"] <- ifelse(access$m1_q23_edu == 1, 1, 0)
access["Edu_UpTo5thStandard"] <- ifelse(access$m1_q23_edu == 2, 1, 0)
access["Edu_MoreThan5thStandard"] <- ifelse(access$m1_q23_edu == 3 | 
                                               access$m1_q23_edu == 4 |
                                               access$m1_q23_edu == 5, 1, 0)

access$Education[access$m1_q23_edu == 1] <- 1
access$Education[access$m1_q23_edu == 2] <- 2

access$Education[access$m1_q23_edu == 3 | 
                   access$m1_q23_edu == 4 |
                   access$m1_q23_edu == 5] <- 3

access$Education<-access$Education
  
###Create religion indicators
access["Religion_Hindu"] <- ifelse(access$m1_q24_religion == 1, 1, 0)
access["Religion_Other"] <- ifelse(access$m1_q24_religion == 2 | access$m1_q24_religion == 3, 1, 0)

access["Religion"] <- ifelse(access$m1_q24_religion == 1, 2, 1)

###Create household size
access$Numpers_house <- access$m1_q27_no_adults + access$m1_q29_no_children

###Create Gender Effect
access["Decision_MaleHouseholdHead"] <- ifelse(access$m1_q38_decision_maker==1, 1, 0)
access["Decision_FemaleHouseholdHead"] <- ifelse(access$m1_q38_decision_maker==2, 1, 0)
access["Decision_Both"] <- ifelse(access$Decision_MaleHouseholdHead==0 & 
                                    access$Decision_FemaleHouseholdHead==0, 1, 0)

access$Decision_Maker <- ifelse(access$m1_q38_decision_maker==1, 1, 
                                ifelse(access$m1_q38_decision_maker==2, 2, 3))


###Create Gender Effect
access["Decision_MaleHouseholdHead"] <- ifelse(access$m1_q38_decision_maker==1, 1, 0)
access["Decision_FemaleHouseholdHead"] <- ifelse(access$m1_q38_decision_maker==2, 1, 0)
access["Decision_Both"] <- ifelse(access$Decision_MaleHouseholdHead==0 & 
                                    access$Decision_FemaleHouseholdHead==0, 1, 0)

access$Decision_Maker <- ifelse(access$m1_q38_decision_maker==1 |
                                  access$m1_q38_decision_maker_other=="Brother" |
                                  access$m1_q38_decision_maker_other=="BROTHER" |
                                  access$m1_q38_decision_maker_other=="Brother of HH Head" |
                                  access$m1_q38_decision_maker_other=="Elder brother" |
                                  access$m1_q38_decision_maker_other=="Father of HH head" |
                                  access$m1_q38_decision_maker_other=="Grand father" |
                                  access$m1_q38_decision_maker_other=="GRAND FATHER" |
                                  access$m1_q38_decision_maker_other=="Grand son" |
                                  access$m1_q38_decision_maker_other=="GRAND SON" |
                                  access$m1_q38_decision_maker_other=="Nephew" |
                                  access$m1_q38_decision_maker_other=="Son" |
                                  access$m1_q38_decision_maker_other=="SON" |
                                  access$m1_q38_decision_maker_other=="Son-in-law" |
                                  access$m1_q38_decision_maker_other=="Sons" |
                                  access$m1_q38_decision_maker_other=="Uncle", 1, 
                                ifelse(access$m1_q38_decision_maker==2 |
                                         access$m1_q38_decision_maker_other=="AUNTI" |
                                         access$m1_q38_decision_maker_other=="Daughter" |
                                         access$m1_q38_decision_maker_other=="DAUGHTER" |
                                         access$m1_q38_decision_maker_other=="DAUGHTER IN LAW" |
                                         access$m1_q38_decision_maker_other=="Daughter-in-law" |
                                         access$m1_q38_decision_maker_other=="Mother" |
                                         access$m1_q38_decision_maker_other=="MOTHER" |
                                         access$m1_q38_decision_maker_other=="Mother of HH head" |
                                         access$m1_q38_decision_maker_other=="NIECE" |
                                         access$m1_q38_decision_maker_other=="Sister-in-law", 2, 3))


###State FE
access["State_UttarPradesh"]<-ifelse(access$m1_q8_state=="UTTAR PRADESH", 1, 0)
access["State_Bihar"]<-ifelse(access$m1_q8_state=="BIHAR", 1, 0)
access["State_WestBengal"]<-ifelse(access$m1_q8_state=="WEST BENGAL", 1, 0)
access["State_Jharkhand"]<-ifelse(access$m1_q8_state=="JHARKHAND", 1, 0)
access["State_Odisha"]<-ifelse(access$m1_q8_state=="ODISHA", 1, 0)
access["State_MadhyaPradesh"]<-ifelse(access$m1_q8_state=="MADHYA PRADESH", 1, 0)

###Time FE
access["Round2"]<-ifelse(access$year=="2018", 1, 0)
access["Round1"]<-ifelse(access$year=="2015", 1, 0)
access$round<-ifelse(access$year=="2018",1,0)

access_nona<-subset(access, !is.na(access$m1_q32_month_expenditure))
# 64 NAs in 2018


# 4. Make figures --------


# 4.1 Figure 2 ----------

# convert grams to kilograms
access$lpg_consumption_kg <- access$lpg_consumption / 1000

# reclassify fuel stacking category to formatting for figure
access$fuel_stack_character <- ifelse(access$fuel_stack==1, "Exclusive LPG", 
                                      ifelse(access$fuel_stack==2, "Primary: LPG \nSecondary: Solid Fuel",
                                             ifelse(access$fuel_stack==3, "Primary: Solid Fuel \nSecondary: LPG",
                                                    ifelse(access$fuel_stack==4, "Exclusive \nSolid Fuel Use", NA))))

# summary values for LPG owners in 2018 for annotation values on figure
access %>% 
  dplyr::select(fuel_stack, year, lpg_consumption_kg) %>%
  dplyr::filter(fuel_stack!=4 & year!=2015) %>%
  dplyr::group_by(fuel_stack) %>%
  dplyr::summarize(n=n(),
            mean = mean(lpg_consumption_kg, na.rm=T),
            sd = sd(lpg_consumption_kg, na.rm=T))

# nb: will take a minute to run
fig2_comparisons <- 
  access %>%
  mutate(lpg_consumption_kg = lpg_consumption / 1000) %>%
  filter(year != "2015" & fuel_stack_character != "Exclusive LPG") %>%
  dabestr::dabest(fuel_stack_character, lpg_consumption_kg, 
         idx = list(c("Primary: LPG \nSecondary: Solid Fuel",
                      "Primary: Solid Fuel \nSecondary: LPG")),
         paired = FALSE
  )

fig2_comparisons # for informing annotations

# main plot
fig2_1 <- ggplot(access %>% 
         filter(fuel_stack!=4 & year!=2015), 
       aes(y=lpg_consumption_kg, x=fuel_stack_character)) +
  geom_boxplot(width=0.5, size=0.6, alpha=1, outlier.size = 1, notch = TRUE) +
  stat_summary(fun=mean, geom="point", shape=5, size=3) +
  scale_y_continuous(trans="log10", breaks=c(0.50, 1, 2.5, 5, 14, 28)) +
  xlab("") + ylab("Monthly LPG Consumption (kg)") +
  coord_cartesian(ylim = c(0.50, 45), clip = 'off') + 
  ggthemes::theme_clean() + 
  theme(plot.margin=unit(c(1,0.5,0,0.3), "cm"),
        plot.background = element_blank(),
    plot.title = element_blank(),
        axis.title.x = element_blank(),
        axis.text.y = element_text(size=16),
        axis.text.x = element_text(size=14),
        axis.title.y = element_text(size=16),
        axis.line.x = element_blank(),
        axis.ticks.x = element_blank()) + 
  annotate("text", x=1.5, y=70, label= "Mean Difference: \n2.1 kg (95% CI: 1.8-2.4)",size=4) +
  geom_segment(mapping = aes(x=1.15, y=40, xend=1.85, yend=40), lineend = "butt") +
  annotate("text", x=2.5, y=70, label= "Mean Difference: \n3.9 kg (95% CI: 3.7-4.2)",size=4) +
  geom_segment(mapping = aes(x=2.15, y=40, xend=2.85, yend=40), lineend = "butt") 

# sample size labels
fig2_2 <- 
  ggplot() + 
  annotate("text", x=1.5, y=1, label= "N = 1,505",size=5) +
  annotate("text", x=2, y=1, label= "N = 1,543",size=5) +
  annotate("text", x=2.5, y=1, label= "N = 1,912",size=5) + 
  theme_classic() + 
  coord_cartesian(clip="off") + 
  theme(plot.title = element_blank(),
        plot.subtitle = element_blank(),
        axis.title = element_blank(), 
        legend.position = "none",
        legend.title = element_blank(),
        axis.ticks = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.line = element_blank())

# combining two figures
fig2 <- ggdraw() +
  draw_plot(fig2_1, x = 0, y = 0.10, width = 1, height = 0.9) +
  draw_plot(fig2_2, x = 0.24, y = 0, width = 0.63, height = 0.1)

# Preview Figure 2 -----
fig2

# Save Figure 2 ------

# ggsave2("~/Figure2_lpg_consumption.png",
#         plot = fig2,
#         dpi = 150,
#         height = 110, width = 180, units = "mm")


# Figure 3: Two stage model SELECTION stage only --------

pd <- position_dodge(0.1) # move them .05 to the left and right

# Caste ----

select_caste <- select_caste[1:4,] # drop extra rows

fig_select_caste <- ggplot(select_caste, aes(x=Caste, y=AAP, group=Caste)) + 
  geom_point(position=pd, size=3, shape=4) + 
  geom_errorbar(data=select_caste, aes(ymin=lower, ymax=upper), width=.1, position=pd)+
  scale_y_continuous(limits=c(0,.75), breaks = c(0, 0.25, 0.50, 0.75), labels = scales::percent_format())+
  ggtitle("Caste")+
  theme_clean()+ 
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        plot.background = element_blank(),
        axis.text.x = element_text(size=16),
        axis.text.y = element_text(size=14),
        axis.title.y = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))

fig_select_caste


# Education -----

select_edu$Education <- as.character(select_edu$Education)
select_edu$Education <- ifelse(select_edu$Education=="No Formal Schooling", "No Formal",
                               ifelse(select_edu$Education=="Up to 5th Standard", "Up to 5th",
                                      ifelse(select_edu$Education=="More than 5th Standard", "More than 5th",NA)))
select_edu$Education <- factor(select_edu$Education, levels=c("More than 5th", "Up to 5th", "No Formal"))


fig_select_edu <- ggplot(select_edu, aes(x=Education, y=AAP, group=Education)) + 
  geom_point(position=pd, size=3, shape=4) +
  geom_errorbar(data=select_edu, aes(ymin=lower, ymax=upper), width=.1, position=pd)+
  scale_y_continuous(limits=c(0,.75), breaks = c(0, 0.25, 0.50, 0.75), labels = scales::percent_format())+
  ggtitle("Household Head Education")+
  # ylab("Predicted Probability of LPG Adoption") +
  theme_clean()+ 
  theme(legend.position = "none",
        plot.background = element_blank(),
        axis.text.x = element_text(size=14),
        axis.text.y = element_text(size=14),
        axis.title = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))

fig_select_edu

# Gender ----

select_gender$Gender <- factor(select_gender$Gender, levels=c("Woman",  "Both", "Man"))

fig_select_gender <- ggplot(select_gender, aes(x=Gender, y=AAP, group=Gender)) + 
  geom_point(position=pd, size=3, shape=4) +
  geom_errorbar(data=select_gender, aes(ymin=lower, ymax=upper), width=.1, position=pd)+
  scale_y_continuous(limits=c(0,.75), breaks = c(0, 0.25, 0.50, 0.75), labels = scales::percent_format())+
  coord_cartesian(clip="off") + 
  ggtitle("Decision Maker Gender")+
  theme_clean()+ 
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        plot.background = element_blank(),
        axis.text.x = element_text(size=16),
        axis.text.y = element_text(size=14),
        axis.title.y = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))

# household size ----

select_house <- select_house %>%
  select(Household.Size, AAP, lower, upper) %>%
  top_n(25) %>%
  mutate(Household.Size = as.numeric(as.character(Household.Size)),
         AAP = as.numeric(as.character(AAP)),
         lower = as.numeric(as.character(lower)),
         upper = as.numeric(as.character(upper)))

fig_select_hhsize <- ggplot(data=select_house, aes(x=Household.Size, y=AAP)) + 
  geom_line(size=1.10)+
  geom_ribbon(aes(ymin=lower, ymax=upper), linetype=2, alpha=0.10)+
  ggtitle("Household Size")+
  geom_rug(data=access, aes(x=HHSize, y=5), position="jitter", alpha=0.075, sides="b",inherit.aes = F)+
  ylab("")+
  scale_x_continuous(limits=c(0,25))+
  scale_y_continuous(limits=c(0,.75), breaks = c(0, 0.25, 0.50, 0.75), labels = scales::percent_format())+
  theme_clean()+ 
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        plot.background = element_blank(),
        axis.text.x = element_text(size=16),
        axis.text.y = element_text(size=14),
        axis.title.y = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))

fig_select_hhsize

# Exp ----

fig_select_exp <- ggplot(data=select_exp, aes(x=Monthly.Expenditure/1000, y=AAP)) + 
  geom_line(size=1.10)+
  geom_ribbon(aes(ymin=lower, ymax=upper), linetype=2, alpha=0.1)+
  ggtitle("Monthly Household Expenditure") +
  geom_rug(data=access, aes(x=m1_q32_month_expenditure/1000,y=.01), position="jitter", alpha=0.30, sides="b", inherit.aes = F) +
  scale_y_continuous(limits=c(0,.75), breaks = c(0, 0.25, 0.50, 0.75), labels = scales::percent_format())+
  scale_x_continuous(breaks = c(5, 20, 40, 60) , 
                     labels = scales::dollar_format(prefix="", suffix = "k INR", big.mark = ",")) +
  coord_cartesian(xlim = c(0, 60), clip="off") + 
  theme_clean()+ 
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        plot.background = element_blank(),
        axis.text.x = element_text(size=13),
        axis.text.y = element_text(size=14),
        axis.title = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))

# HH Head Age ---

select_age <- select_age %>%
  top_n(21) %>%
  mutate(AAP = as.numeric(as.character(AAP)),
         Age = as.numeric(as.character(Age)),
         lower = as.numeric(as.character(lower)),
         upper = as.numeric(as.character(upper)))

fig_select_age <- ggplot(data=select_age, aes(x=Age, y=AAP)) + 
  geom_line(size=1.10)+
  geom_ribbon(aes(ymin=lower, ymax=upper), linetype=2, alpha=0.1) +
  ggtitle("Household Head Age")+
  scale_x_continuous(limits=c(18,80), labels = scales::unit_format(suffix = " yrs"))+
  geom_rug(data=access, aes(x=m1_q19_age, y=5), position="jitter", alpha=0.05, sides="b",inherit.aes = F)+
  scale_y_continuous(limits=c(0,.75), breaks = c(0, 0.25, 0.50, 0.75), labels = scales::percent_format())+
  theme_clean()+ 
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        plot.background = element_blank(),
        axis.text.x = element_text(size=16),
        axis.text.y = element_text(size=14),
        axis.title.y = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))

# Selection figure construction ------


selection <- plot_grid(fig_select_edu, fig_select_caste,fig_select_gender,
                       fig_select_exp,  fig_select_age, fig_select_hhsize,
                       ncol=3)


selection


selection_annotated <- annotate_figure(selection, # top = text_grob("a   LPG Adoption", size = 30, x=0.05, face = "bold"),
                                       left = text_grob("Predicted Probability of LPG Adoption", face = "bold", size = 18, rot = 90))

# Preview Figure 3 -------
selection_annotated


# ggsave2("~/Figure3_selection.png",
#         plot = selection_annotated,
#         dpi = 150,
#         height = 160, width = 310, units = "mm")



# Figure 4: Two stage model CONSUMPTION stage only --------

# caste ----

consume_caste <- consume_caste[1:4,] # drop extra rows
consume_caste$AAP <- consume_caste$AAP / 1000
consume_caste$lower <- consume_caste$lower / 1000
consume_caste$upper <- consume_caste$upper / 1000

fig_consume_caste <- ggplot(consume_caste, aes(x=Caste, y=AAP, group=Caste)) + 
  geom_point(position=pd, size=3, shape=4) +
  geom_errorbar(data=consume_caste, aes(ymin=lower, ymax=upper), width=.1, position=pd)+
  scale_y_continuous(limits=c(5,10.5))+
  ggtitle("Caste")+
  theme_clean()+ 
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        plot.background = element_blank(),
        axis.text.x = element_text(size=16),
        axis.text.y = element_text(size=14),
        axis.title.y = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))

fig_consume_caste

# Education ----

consume_edu$Education <- as.character(consume_edu$Education)
consume_edu$Education <- ifelse(consume_edu$Education=="No Formal Schooling", "No Formal",
                               ifelse(consume_edu$Education=="Up to 5th Standard", "Up to 5th",
                                      ifelse(consume_edu$Education=="More than 5th Standard", "More than 5th",NA)))
consume_edu$Education <- factor(consume_edu$Education, levels=c("More than 5th", "Up to 5th", "No Formal"))


consume_edu <- consume_edu[1:3,]
consume_edu$AAP <- consume_edu$AAP / 1000
consume_edu$lower <- consume_edu$X / 1000
consume_edu$upper <- consume_edu$X.1 / 1000

fig_consume_edu <- ggplot(data=consume_edu, aes(x=Education, y=AAP, group=Education)) + 
  geom_point(position=pd, size=3,shape=4) + 
  geom_errorbar(data=consume_edu, aes(ymin=lower, ymax=upper), width=.1, position=pd)+
  scale_y_continuous(limits=c(5,10.5))+
  ggtitle("Household Head Education")+
  theme_clean()+ 
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        plot.background = element_blank(),
        axis.text.x = element_text(size=14),
        axis.text.y = element_text(size=14),
        axis.title.y = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))

fig_consume_edu

# Gender ----

consume_gender$Gender <- as.character(consume_gender$Gender)
consume_gender[consume_gender=="Women"] <- "Woman"
consume_gender$Gender <- factor(consume_gender$Gender, levels=c("Woman", "Both", "Man"))

consume_gender$AAP <- consume_gender$AAP / 1000
consume_gender$lower <- consume_gender$lower / 1000
consume_gender$upper <- consume_gender$upper / 1000


fig_consume_gender <- ggplot(consume_gender, aes(x=Gender, y=AAP, group=Gender)) + 
  geom_point(position=pd, size=3, shape=4) +
  geom_errorbar(data=consume_gender, aes(ymin=lower, ymax=upper), width=.1, position=pd)+
  scale_y_continuous(limits=c(5,10.5))+
  ggtitle("Decision Maker Gender")+
  theme_clean()+ 
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        plot.background = element_blank(),
        axis.text.x = element_text(size=16),
        axis.text.y = element_text(size=14),
        axis.title.y = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))

fig_consume_gender

# HH Size -----

quantile(access$HHSize, probs=c(0, 0.25, 0.5, 0.75, 1), na.rm=T)

consume_house <- consume_house %>%
  top_n(25) %>%
  select(Household.Size, AAP, lower, upper) %>%
  mutate(Household.Size = as.numeric(as.character(Household.Size)),
         AAP = as.numeric(as.character(AAP)),
         lower = as.numeric(as.character(lower)),
         upper = as.numeric(as.character(upper)))

consume_house$AAP <- consume_house$AAP / 1000
consume_house$lower <- consume_house$lower / 1000
consume_house$upper <- consume_house$upper / 1000

fig_consume_hhsize <- ggplot(data=consume_house, aes(x=Household.Size, y=AAP)) + 
  geom_line(size=1.10)+
  geom_ribbon(aes(ymin=lower, ymax=upper), linetype=2, alpha=0.10)+
  geom_rug(data=access, aes(x=HHSize, y=5), position="jitter", alpha=0.075, sides="b",inherit.aes = F)+
  scale_x_continuous(limits=c(0,25))+
  scale_y_continuous(limits=c(5,10.5))+
  ggtitle("Household Size")+
  theme_clean()+ 
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        plot.background = element_blank(),
        axis.text.x = element_text(size=16),
        axis.text.y = element_text(size=14),
        axis.title.y = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))

fig_consume_hhsize

# Expenditures -----
consume_exp$AAP <- consume_exp$AAP / 1000
consume_exp$lower <- consume_exp$lower / 1000
consume_exp$upper <- consume_exp$upper / 1000

fig_consume_exp <- ggplot(data=consume_exp, aes(x=Monthly.Expenditure/1000, y=AAP)) + 
  geom_line(size=1.10)+
  geom_ribbon(aes(ymin=lower, ymax=upper), linetype=2, alpha=0.1)+
  ggtitle("Monthly Household Expenditures")+
  geom_rug(data=access, aes(x=m1_q32_month_expenditure/1000,y=.01), position="jitter", alpha=0.30, sides="b", inherit.aes = F) +
  scale_x_continuous(breaks = c(5, 20, 40, 60), 
                     labels = scales::dollar_format(prefix="", suffix = "k INR", big.mark = ",")) +
  scale_y_continuous(limits=c(5,10.5))+
  coord_cartesian(xlim=c(0,60), clip="off") + 
  theme_clean()+ 
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        plot.background = element_blank(),
        axis.text.x = element_text(size=12),
        axis.text.y = element_text(size=14),
        axis.title.y = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))


# HH Head Age -----

consume_age <- consume_age %>%
  top_n(21) %>%
  select(Age, AAP, lower, upper) %>%
  mutate(Age = as.numeric(as.character(Age)),
         AAP = as.numeric(as.character(AAP)),
         lower = as.numeric(as.character(lower)),
         upper = as.numeric(as.character(upper)))


consume_age$AAP <- consume_age$AAP / 1000
consume_age$lower <- consume_age$lower / 1000
consume_age$upper <- consume_age$upper / 1000

fig_consume_age <- ggplot(data=consume_age, aes(x=Age, y=AAP)) + 
  geom_line(size=1.10)+
  geom_ribbon(aes(ymin=consume_age$lower, ymax=consume_age$upper), linetype=2, alpha=0.1)+
  ggtitle("Household Head Age")+
  scale_x_continuous(limits=c(18,80), labels = scales::unit_format(suffix = " yrs"))+
  scale_y_continuous(limits=c(5,10.5))+
  geom_rug(data=access, aes(x=access$m1_q19_age, y=5), position="jitter", alpha=0.05, sides="b",inherit.aes = F)+
  theme_clean()+ 
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        plot.background = element_blank(),
        axis.text.x = element_text(size=16),
        axis.text.y = element_text(size=14),
        axis.title.y = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))



# Years LPG -----

consume_yrs <- consume_yrs %>%
  top_n(21) %>%
  select(LPG.year, AAP, lower, upper) %>%
  mutate(LPG.year = as.numeric(as.character(LPG.year)),
         AAP = as.numeric(as.character(AAP)),
         lower = as.numeric(as.character(lower)),
         upper = as.numeric(as.character(upper)))

consume_yrs$AAP <- consume_yrs$AAP / 1000
consume_yrs$lower <- consume_yrs$lower / 1000
consume_yrs$upper <- consume_yrs$upper / 1000

fig_consume_lpgyrs <- ggplot(consume_yrs, aes(x=LPG.year, y=AAP)) + 
  geom_line(size=1.10)+
  geom_ribbon(aes(ymin=lower, ymax=upper), linetype=2, alpha=0.1)+
  ggtitle("Years cooking with LPG")+
  scale_x_continuous(limits=c(0,8), labels = scales::unit_format(suffix = " yrs",accuracy = 1))+
  scale_y_continuous(limits=c(5,10.5))+
  geom_rug(data=access, aes(x=m4_q103_1_lpg_year, y=5), position="jitter", alpha=0.075, sides="b",inherit.aes = F)+
  theme_clean()+ 
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        plot.background = element_blank(),
        axis.text.x = element_text(size=16),
        axis.text.y = element_text(size=14),
        axis.title.y = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))

fig_consume_lpgyrs

# PMUY ----

consume_pmuy <- consume_pmuy[1:2,] # drop extra rows
consume_pmuy$AAP <- consume_pmuy$AAP / 1000
consume_pmuy$lower <- consume_pmuy$lower / 1000
consume_pmuy$upper <- consume_pmuy$upper / 1000
consume_pmuy$pmuy <- c("General Customer", "PMUY Beneficiary")

fig_consume_pmuy <- ggplot(consume_pmuy, aes(x=pmuy, y=AAP, group=pmuy)) + 
  geom_point(position=pd, size=3, shape=4) +
  geom_errorbar(data=consume_pmuy, aes(ymin=lower, ymax=upper), width=.1, position=pd)+
  ggtitle("Acquired LPG through PMUY?")+
  scale_y_continuous(limits=c(5,10.5))+
  theme_clean()+ 
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        plot.background = element_blank(),
        axis.text.x = element_text(size=13),
        axis.text.y = element_text(size=14),
        axis.title.y = element_blank(),
        plot.title = element_text(size=18, hjust=0.5))

fig_consume_pmuy

# Consumption figure production -----

consumption <- plot_grid(fig_consume_edu, fig_consume_caste, fig_consume_gender,
                         fig_consume_exp, fig_consume_age, fig_consume_hhsize,
                         fig_consume_lpgyrs, fig_consume_pmuy, NA,
                         ncol=3)

consumption

consumption_annotated <- annotate_figure(consumption, # top = text_grob("b   LPG Use", size = 30, x=0.1, y=0.5, face = "bold"),
                                         left = text_grob("Predicted Monthly Consumption of LPG (kg)",size = 18, face = "bold", rot = 90))

# Preview Figure 4 -------
consumption_annotated

# Save Figure 4 ------

# ggsave2("~/Figure4_consumption.png",
#         plot = consumption_annotated,
#         dpi = 150,
#         height = 225, width = 300, units = "mm")

